import pandas as pd
import re
import matplotlib.pyplot as plt
# Load the CSV, skipping the metadata/header rows
df = pd.read_csv('Kanishk_K562_TNFA_Validation_Optical_Results.csv', skiprows=19)
# Select relevant columns and drop rows with missing values
df = df[['Sample', 'Biological Set Name', 'Cq']].dropna()
# Extract numeric time for sorting (e.g., "10 min" -> 10)
def extract_time(s):
match = re.match(r'(\d+)\s*min', str(s))
return int(match.group(1)) if match else -1
# Create a new column for sorting
df['time_numeric'] = df['Biological Set Name'].apply(extract_time)
# Get unique Biological Set Names and sort them by extracted time
sorted_time_names = df.drop_duplicates('Biological Set Name').sort_values('time_numeric')['Biological Set Name']
# Pivot the table
pivot = df.pivot(index='Sample', columns='Biological Set Name', values='Cq')
# Reindex columns to ensure correct order
pivot = pivot.reindex(columns=sorted_time_names)
pivot_table=pivot
pivot_table
Biological Set Name | 0 min | 10 min | 20 min | 30 min | 40 min | 50 min | 60 min | 70 min | 80 min | 90 min | 100 min | 110 min |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sample | ||||||||||||
ACTB | 20.167819 | 20.057232 | 20.032432 | 20.162431 | 20.184199 | 20.323662 | 20.301582 | 20.242956 | 20.251312 | 20.054655 | 20.240633 | 20.300083 |
GAPDH | 21.432254 | 21.114981 | 21.073946 | 21.137320 | 20.904244 | 21.168053 | 21.009488 | 21.048110 | 20.973592 | 20.769913 | 20.765195 | 20.187977 |
HPRT1 | 29.886828 | 29.958761 | 30.046877 | 30.193573 | 29.970664 | 30.576978 | 30.232387 | 31.233056 | 30.099213 | 30.102881 | 30.227369 | 30.441267 |
ICAM1 | 26.574846 | 26.325522 | 25.723978 | 23.702137 | 22.447005 | 22.015139 | 21.482922 | 21.353166 | 20.881415 | 20.612188 | 20.579137 | 20.791574 |
IER3 | 26.882741 | 26.473618 | 24.288434 | 23.268375 | 22.756264 | 22.959706 | 23.012005 | 22.984936 | 22.782780 | 22.935748 | 22.817715 | 23.383582 |
IL8 | 30.369014 | 30.748940 | 29.307146 | 28.438746 | 27.953410 | 28.294158 | 28.263602 | 28.155412 | 27.720445 | 27.881261 | 27.712730 | 27.757117 |
NFKBIA | 26.347702 | 25.626266 | 23.297582 | 22.464121 | 22.000991 | 22.033389 | 21.984129 | 21.982766 | 21.805209 | 21.947809 | 22.016316 | 22.263624 |
TNFRSF9 | 36.513770 | 35.399690 | 34.987913 | 31.495087 | 30.034275 | 30.142218 | 29.225559 | 28.909810 | 28.081962 | 28.056233 | 28.109982 | 28.587318 |
#ACTB has more stable expression. Selecting it for Normalization. Also had Pipetting error for last GAPDH Sample at 110min. Ran out of RT-PCR Mix
#Getting Delta Ct
#Dropping HPRT1 and GAPDH from Analysis
pivot_table = pivot_table.drop(['HPRT1', 'GAPDH'], axis=0)
pivot_table
Biological Set Name | 0 min | 10 min | 20 min | 30 min | 40 min | 50 min | 60 min | 70 min | 80 min | 90 min | 100 min | 110 min |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sample | ||||||||||||
ACTB | 20.167819 | 20.057232 | 20.032432 | 20.162431 | 20.184199 | 20.323662 | 20.301582 | 20.242956 | 20.251312 | 20.054655 | 20.240633 | 20.300083 |
ICAM1 | 26.574846 | 26.325522 | 25.723978 | 23.702137 | 22.447005 | 22.015139 | 21.482922 | 21.353166 | 20.881415 | 20.612188 | 20.579137 | 20.791574 |
IER3 | 26.882741 | 26.473618 | 24.288434 | 23.268375 | 22.756264 | 22.959706 | 23.012005 | 22.984936 | 22.782780 | 22.935748 | 22.817715 | 23.383582 |
IL8 | 30.369014 | 30.748940 | 29.307146 | 28.438746 | 27.953410 | 28.294158 | 28.263602 | 28.155412 | 27.720445 | 27.881261 | 27.712730 | 27.757117 |
NFKBIA | 26.347702 | 25.626266 | 23.297582 | 22.464121 | 22.000991 | 22.033389 | 21.984129 | 21.982766 | 21.805209 | 21.947809 | 22.016316 | 22.263624 |
TNFRSF9 | 36.513770 | 35.399690 | 34.987913 | 31.495087 | 30.034275 | 30.142218 | 29.225559 | 28.909810 | 28.081962 | 28.056233 | 28.109982 | 28.587318 |
#Getting Delta Ct
delta_ct = pivot_table.sub(pivot_table.loc['ACTB'], axis=1)
delta_ct
Biological Set Name | 0 min | 10 min | 20 min | 30 min | 40 min | 50 min | 60 min | 70 min | 80 min | 90 min | 100 min | 110 min |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sample | ||||||||||||
ACTB | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
ICAM1 | 6.407027 | 6.268290 | 5.691546 | 3.539706 | 2.262807 | 1.691477 | 1.181340 | 1.110209 | 0.630103 | 0.557533 | 0.338504 | 0.491491 |
IER3 | 6.714922 | 6.416386 | 4.256002 | 3.105945 | 2.572066 | 2.636043 | 2.710423 | 2.741980 | 2.531469 | 2.881093 | 2.577082 | 3.083500 |
IL8 | 10.201196 | 10.691708 | 9.274714 | 8.276315 | 7.769211 | 7.970496 | 7.962020 | 7.912455 | 7.469134 | 7.826606 | 7.472096 | 7.457035 |
NFKBIA | 6.179883 | 5.569034 | 3.265150 | 2.301690 | 1.816792 | 1.709727 | 1.682548 | 1.739810 | 1.553897 | 1.893154 | 1.775683 | 1.963542 |
TNFRSF9 | 16.345951 | 15.342458 | 14.955481 | 11.332656 | 9.850076 | 9.818556 | 8.923977 | 8.666854 | 7.830650 | 8.001578 | 7.869348 | 8.287236 |
#Setting first timepoint as control
control_delta_ct = delta_ct['0 min']
#Getting Delta Delta Ct
delta_delta_ct = delta_ct.sub(control_delta_ct, axis=0)
delta_delta_ct
Biological Set Name | 0 min | 10 min | 20 min | 30 min | 40 min | 50 min | 60 min | 70 min | 80 min | 90 min | 100 min | 110 min |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sample | ||||||||||||
ACTB | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
ICAM1 | 0.0 | -0.138737 | -0.715481 | -2.867320 | -4.144220 | -4.715550 | -5.225687 | -5.296818 | -5.776924 | -5.849494 | -6.068523 | -5.915536 |
IER3 | 0.0 | -0.298537 | -2.458920 | -3.608977 | -4.142857 | -4.078879 | -4.004499 | -3.972942 | -4.183453 | -3.833829 | -4.137840 | -3.631422 |
IL8 | 0.0 | 0.490512 | -0.926481 | -1.924881 | -2.431984 | -2.230699 | -2.239175 | -2.288740 | -2.732062 | -2.374590 | -2.729099 | -2.744161 |
NFKBIA | 0.0 | -0.610849 | -2.914733 | -3.878193 | -4.363090 | -4.470156 | -4.497335 | -4.440073 | -4.625986 | -4.286728 | -4.404200 | -4.216341 |
TNFRSF9 | 0.0 | -1.003493 | -1.390470 | -5.013295 | -6.495875 | -6.527395 | -7.421974 | -7.679097 | -8.515300 | -8.344372 | -8.476603 | -8.058715 |
#Getting Fold Change
fold_change = 2 ** (-delta_delta_ct)
fold_change #Values have too high a range will plot -delta_delta_ct only
Biological Set Name | 0 min | 10 min | 20 min | 30 min | 40 min | 50 min | 60 min | 70 min | 80 min | 90 min | 100 min | 110 min |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sample | ||||||||||||
ACTB | 1.0 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
ICAM1 | 1.0 | 1.100941 | 1.642031 | 7.297086 | 17.682132 | 26.273752 | 37.418690 | 39.309816 | 54.831152 | 57.659786 | 67.113111 | 60.360620 |
IER3 | 1.0 | 1.229896 | 5.498051 | 12.201422 | 17.665425 | 16.899149 | 16.049978 | 15.702719 | 18.169583 | 14.259277 | 17.604103 | 12.392732 |
IL8 | 1.0 | 0.711772 | 1.900635 | 3.797054 | 5.396351 | 4.693615 | 4.721271 | 4.886292 | 6.644045 | 5.185883 | 6.630415 | 6.699999 |
NFKBIA | 1.0 | 1.527158 | 7.540882 | 14.704572 | 20.578849 | 22.164143 | 22.585663 | 21.706772 | 24.692237 | 19.517933 | 21.173676 | 18.588537 |
TNFRSF9 | 1.0 | 2.004848 | 2.621641 | 32.296249 | 90.251244 | 92.244751 | 171.489161 | 204.945572 | 365.898699 | 325.017229 | 356.214517 | 266.633697 |
positive_ddct=-delta_delta_ct
positive_ddct #Changing sign
Biological Set Name | 0 min | 10 min | 20 min | 30 min | 40 min | 50 min | 60 min | 70 min | 80 min | 90 min | 100 min | 110 min |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sample | ||||||||||||
ACTB | -0.0 | -0.000000 | -0.000000 | -0.000000 | -0.000000 | -0.000000 | -0.000000 | -0.000000 | -0.000000 | -0.000000 | -0.000000 | -0.000000 |
ICAM1 | -0.0 | 0.138737 | 0.715481 | 2.867320 | 4.144220 | 4.715550 | 5.225687 | 5.296818 | 5.776924 | 5.849494 | 6.068523 | 5.915536 |
IER3 | -0.0 | 0.298537 | 2.458920 | 3.608977 | 4.142857 | 4.078879 | 4.004499 | 3.972942 | 4.183453 | 3.833829 | 4.137840 | 3.631422 |
IL8 | -0.0 | -0.490512 | 0.926481 | 1.924881 | 2.431984 | 2.230699 | 2.239175 | 2.288740 | 2.732062 | 2.374590 | 2.729099 | 2.744161 |
NFKBIA | -0.0 | 0.610849 | 2.914733 | 3.878193 | 4.363090 | 4.470156 | 4.497335 | 4.440073 | 4.625986 | 4.286728 | 4.404200 | 4.216341 |
TNFRSF9 | -0.0 | 1.003493 | 1.390470 | 5.013295 | 6.495875 | 6.527395 | 7.421974 | 7.679097 | 8.515300 | 8.344372 | 8.476603 | 8.058715 |
positive_ddct.T.plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation.svg", format='svg')
So a Time Delay before the activation of TNFRSF9 and ICAM1 is evident in the qPCR data as well.
positive_ddct.T[["NFKBIA","IL8"]].plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation_NFKBIA_IL8.svg", format='svg')
positive_ddct.T[["NFKBIA","ICAM1"]].plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation_NFKBIA_ICAM1.svg", format='svg')
positive_ddct.T[["NFKBIA","TNFRSF9"]].plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation_NFKBIA_TNFRSF9.svg", format='svg')
positive_ddct.T[["NFKBIA","IER3"]].plot(xlabel="Time", ylabel="Log Fold Change")
plt.savefig("qPCR_GenePlots_Log_Fold_Validation_NFKBIA_IER3.svg", format='svg')
(100/pivot_table).T.plot(xlabel="Time",ylabel="Relative Concentration 100/Cq")
plt.savefig("qPCR_GenePlots_Relative_concentration_100DivCq.svg", format='svg')
scK562_genes=pd.read_csv("Selected_Genes_scK562.csv",index_col=0)
scK562_genes
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
ACTB | 2.994008 | 2.930615 | 2.863599 | 2.971982 | 2.928550 | 2.970020 | 2.926710 | 3.000689 | 2.948781 | 2.836841 | 2.892733 | 3.007348 |
ICAM1 | 0.086006 | 0.082477 | 0.085802 | 0.225258 | 0.443590 | 0.515901 | 0.764676 | 0.888616 | 0.891994 | 0.867710 | 0.750336 | 0.949493 |
IER3 | 0.058151 | 0.048934 | 0.210205 | 0.516017 | 0.562482 | 0.531894 | 0.433471 | 0.412251 | 0.382959 | 0.346042 | 0.350955 | 0.289744 |
IL8 | 0.106400 | 0.089052 | 0.417153 | 0.731696 | 1.071399 | 1.079914 | 1.080593 | 1.097727 | 1.061795 | 1.109638 | 0.992169 | 1.244911 |
NFKBIA | 0.573597 | 0.612698 | 1.392816 | 2.252066 | 2.594812 | 2.718964 | 2.547843 | 2.437945 | 2.236336 | 2.259228 | 2.101444 | 2.186365 |
TNFRSF9 | 0.014858 | 0.003576 | 0.019713 | 0.028735 | 0.051805 | 0.132589 | 0.175997 | 0.247497 | 0.379532 | 0.342595 | 0.417288 | 0.442723 |
scK562_genes.columns=pivot_table.columns
scK562_genes.index=pivot_table.index
scK562_genes
Biological Set Name | 0 min | 10 min | 20 min | 30 min | 40 min | 50 min | 60 min | 70 min | 80 min | 90 min | 100 min | 110 min |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sample | ||||||||||||
ACTB | 2.994008 | 2.930615 | 2.863599 | 2.971982 | 2.928550 | 2.970020 | 2.926710 | 3.000689 | 2.948781 | 2.836841 | 2.892733 | 3.007348 |
ICAM1 | 0.086006 | 0.082477 | 0.085802 | 0.225258 | 0.443590 | 0.515901 | 0.764676 | 0.888616 | 0.891994 | 0.867710 | 0.750336 | 0.949493 |
IER3 | 0.058151 | 0.048934 | 0.210205 | 0.516017 | 0.562482 | 0.531894 | 0.433471 | 0.412251 | 0.382959 | 0.346042 | 0.350955 | 0.289744 |
IL8 | 0.106400 | 0.089052 | 0.417153 | 0.731696 | 1.071399 | 1.079914 | 1.080593 | 1.097727 | 1.061795 | 1.109638 | 0.992169 | 1.244911 |
NFKBIA | 0.573597 | 0.612698 | 1.392816 | 2.252066 | 2.594812 | 2.718964 | 2.547843 | 2.437945 | 2.236336 | 2.259228 | 2.101444 | 2.186365 |
TNFRSF9 | 0.014858 | 0.003576 | 0.019713 | 0.028735 | 0.051805 | 0.132589 | 0.175997 | 0.247497 | 0.379532 | 0.342595 | 0.417288 | 0.442723 |
scK562_genes.T.corrwith((100/pivot_table).T)
Sample ACTB -0.605591 ICAM1 0.957025 IER3 0.878033 IL8 0.959505 NFKBIA 0.955798 TNFRSF9 0.839289 dtype: float64
scK562_genes.T.corrwith((pivot_table).T)
Sample ACTB 0.604427 ICAM1 -0.946985 IER3 -0.874428 IL8 -0.959311 NFKBIA -0.951711 TNFRSF9 -0.811246 dtype: float64
100/pivot_table
Biological Set Name | 0 min | 10 min | 20 min | 30 min | 40 min | 50 min | 60 min | 70 min | 80 min | 90 min | 100 min | 110 min |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sample | ||||||||||||
ACTB | 4.958394 | 4.985733 | 4.991905 | 4.959719 | 4.954371 | 4.920373 | 4.925725 | 4.939990 | 4.937952 | 4.986373 | 4.940557 | 4.926088 |
ICAM1 | 3.762957 | 3.798595 | 3.887424 | 4.219029 | 4.454937 | 4.542329 | 4.654860 | 4.683146 | 4.788948 | 4.851498 | 4.859290 | 4.809641 |
IER3 | 3.719859 | 3.777345 | 4.117186 | 4.297679 | 4.394394 | 4.355457 | 4.345558 | 4.350676 | 4.389280 | 4.360006 | 4.382560 | 4.276505 |
IL8 | 3.292830 | 3.252145 | 3.412137 | 3.516329 | 3.577381 | 3.534298 | 3.538119 | 3.551715 | 3.607446 | 3.586638 | 3.608450 | 3.602680 |
NFKBIA | 3.795397 | 3.902246 | 4.292291 | 4.451543 | 4.545250 | 4.538566 | 4.548736 | 4.549018 | 4.586060 | 4.556263 | 4.542086 | 4.491632 |
TNFRSF9 | 2.738693 | 2.824883 | 2.858130 | 3.175098 | 3.329529 | 3.317606 | 3.421663 | 3.459033 | 3.561005 | 3.564270 | 3.557455 | 3.498055 |
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
# Data from the first table (expression levels over time for sample 1)
data1 = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 0.41728783, 0.44272283]
}
# Data from the second table (expression levels over time for sample 2)
data2 = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}
# Convert to DataFrames
df1 = pd.DataFrame(data1).set_index('Time')
df2 = pd.DataFrame(data2).set_index('Time')
# Plotting gene expression over time for the first dataset
plt.figure(figsize=(12, 6))
for gene in df1.columns:
plt.plot(df1.index, df1[gene], marker='o', label=gene)
plt.title('Gene Expression Levels Over Time - ChronoSeq')
plt.xlabel('Time')
plt.ylabel('Expression Level')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# Plotting gene expression over time for the second dataset
plt.figure(figsize=(12, 6))
for gene in df2.columns:
plt.plot(df2.index, df2[gene], marker='o', label=gene)
plt.title('Gene Expression Levels Over Time - qPCR')
plt.xlabel('Time')
plt.ylabel('Expression Level')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# Calculate Pearson correlation and p-values for each gene
correlations = {}
p_values = {}
genes = df1.columns
for gene in genes:
# Ensure we are correlating the numerical values
corr, p_val = pearsonr(df1[gene].astype(float), df2[gene].astype(float))
correlations[gene] = corr
p_values[gene] = p_val
# Output correlation and p-values dictionaries
print("Correlations:", correlations)
print("P-values:", p_values)
# Plotting correlation coefficients
plt.figure(figsize=(10, 5))
plt.bar(correlations.keys(), correlations.values(), color='skyblue')
plt.title('Pearson Correlation Coefficients Between Gene Expression Levels')
plt.xlabel('Genes')
plt.ylabel('Correlation Coefficient')
plt.ylim(-1, 1) # Correlation coefficient ranges from -1 to 1
plt.grid(axis='y', linestyle='--')
plt.show()
# Plotting p-values on a logarithmic scale
plt.figure(figsize=(10, 5))
plt.bar(p_values.keys(), p_values.values(), color='salmon')
plt.title('P-values for Correlation Tests Between Gene Expression Levels')
plt.xlabel('Genes')
plt.ylabel('P-value (log scale)')
plt.yscale('log') # Use logarithmic scale for p-values as they can be very small
plt.grid(axis='y', linestyle='--')
plt.show()
Correlations: {'ACTB': -0.6055940421318541, 'ICAM1': 0.9570251356459543, 'IER3': 0.878033279489262, 'IL8': 0.9595046097297201, 'NFKBIA': 0.9557978646892042, 'TNFRSF9': 0.8392894913265406} P-values: {'ACTB': 0.03690132234849123, 'ICAM1': 1.0738900866252006e-06, 'IER3': 0.00017261060928962815, 'IL8': 8.011961244752918e-07, 'NFKBIA': 1.2336734402499755e-06, 'TNFRSF9': 0.000640405351540273}
correlations
{'ACTB': -0.6055940421318541, 'ICAM1': 0.9570251356459543, 'IER3': 0.878033279489262, 'IL8': 0.9595046097297201, 'NFKBIA': 0.9557978646892042, 'TNFRSF9': 0.8392894913265406}
p_values
{'ACTB': 0.03690132234849123, 'ICAM1': 1.0738900866252006e-06, 'IER3': 0.00017261060928962815, 'IL8': 8.011961244752918e-07, 'NFKBIA': 1.2336734402499755e-06, 'TNFRSF9': 0.000640405351540273}
import numpy as np
import pandas as pd
from scipy.spatial.distance import euclidean
from scipy.signal import correlate
from scipy.stats import pearsonr
# Data from the first table (expression levels over time for sample 1)
data1 = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 0.41728783, 0.44272283]
}
# Data from the second table (expression levels over time for sample 2)
data2 = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}
df1 = pd.DataFrame(data1).set_index('Time')
df2 = pd.DataFrame(data2).set_index('Time')
# DTW approximation using Euclidean distance and lagged cross-correlation
# Since dtw package is not available, we will use cross-correlation to approximate similarity and lag
dtw_approx_results = {}
ccf_results = {}
for gene in df1.columns:
x = df1[gene].values
y = df2[gene].values
# Normalize the series
x_norm = (x - np.mean(x)) / np.std(x)
y_norm = (y - np.mean(y)) / np.std(y)
# Cross-correlation
ccf = correlate(x_norm, y_norm, mode='full')
lags = np.arange(-len(x_norm) + 1, len(x_norm))
max_ccf_index = np.argmax(ccf)
max_ccf = ccf[max_ccf_index]
lag_at_max_ccf = lags[max_ccf_index]
# Pearson correlation as a proxy for similarity
r, p = pearsonr(x, y)
dtw_approx_results[gene] = {'max_ccf': max_ccf, 'lag': lag_at_max_ccf, 'pearson_r': r, 'pearson_p': p}
ccf_results[gene] = {'ccf': ccf, 'lags': lags}
dtw_approx_results
{'ACTB': {'max_ccf': 4.566926973563788, 'lag': -2, 'pearson_r': -0.6055940421318541, 'pearson_p': 0.03690132234849123}, 'ICAM1': {'max_ccf': 11.48430162775145, 'lag': 0, 'pearson_r': 0.9570251356459543, 'pearson_p': 1.0738900866252006e-06}, 'IER3': {'max_ccf': 10.536399353871143, 'lag': 0, 'pearson_r': 0.878033279489262, 'pearson_p': 0.00017261060928962815}, 'IL8': {'max_ccf': 11.51405531675664, 'lag': 0, 'pearson_r': 0.9595046097297201, 'pearson_p': 8.011961244752918e-07}, 'NFKBIA': {'max_ccf': 11.46957437627045, 'lag': 0, 'pearson_r': 0.9557978646892042, 'pearson_p': 1.2336734402499755e-06}, 'TNFRSF9': {'max_ccf': 10.071473895918485, 'lag': 0, 'pearson_r': 0.8392894913265406, 'pearson_p': 0.000640405351540273}}
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine
# Data from the first table (expression levels over time for sample 1)
data1 = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196, 2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145, 0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944, 0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139, 1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639, 2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247, 0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483, 0.41728783, 0.44272283]
}
# Data from the second table (expression levels over time for sample 2)
data2 = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min', '60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373, 4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329, 4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457, 4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298, 3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566, 4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606, 3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}
# Convert to DataFrames
df1 = pd.DataFrame(data1).set_index('Time')
df2 = pd.DataFrame(data2).set_index('Time')
# Calculate cosine distance for each gene
cosine_distances = {}
genes = df1.columns
print("Cosine Distances:")
for gene in genes:
# Extract expression values for the current gene from both datasets
vector1 = df1[gene].values
vector2 = df2[gene].values
# Calculate cosine distance
# cosine() function returns 1 - cosine_similarity
# If vectors are identical, distance is 0.
# If vectors are orthogonal, distance is 1.
# If vectors are opposite, distance is 2 (for data that can be negative).
# For positive data like gene expression, distance will be between 0 and 1.
dist = cosine(vector1, vector2)
cosine_distances[gene] = dist
print(f"{gene}: {dist:.6f}")
# You can also store them in a DataFrame for better presentation if needed
# cosine_distances_df = pd.DataFrame(list(cosine_distances.items()), columns=['Gene', 'Cosine_Distance'])
# print("\nCosine Distances DataFrame:")
# print(cosine_distances_df)
Cosine Distances: ACTB: 0.000219 ICAM1: 0.105884 IER3: 0.076341 IL8: 0.080463 NFKBIA: 0.039885 TNFRSF9: 0.199376
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
def create_cosine_distance_bar_chart(cosine_distances_dict, save_filename='cosine_distance_bar_chart.svg'):
"""
Create a publication-quality bar chart of cosine distances between ChronoSeq and qPCR methods
Parameters:
cosine_distances_dict: dict with gene names as keys and cosine distances as values
save_filename: string, filename to save the plot
"""
# Set publication style parameters
plt.rcParams.update({
'font.size': 12,
'font.family': 'Arial',
'axes.linewidth': 1.2,
'axes.spines.top': False,
'axes.spines.right': False,
'xtick.major.size': 4,
'ytick.major.size': 4,
'figure.dpi': 300,
'savefig.dpi': 300,
'savefig.bbox': 'tight'
})
# Gene colors for consistency with other plots
gene_colors = {
'ACTB': '#1f77b4', # Blue - housekeeping
'ICAM1': '#ff7f0e', # Orange
'IER3': '#2ca02c', # Green
'IL8': '#d62728', # Red
'NFKBIA': '#9467bd', # Purple
'TNFRSF9': '#8c564b' # Brown
}
# Extract data for plotting
genes = list(cosine_distances_dict.keys())
distances = list(cosine_distances_dict.values())
colors = [gene_colors[gene] for gene in genes]
# Create the figure and axis
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
# Create the bar chart
bars = ax.bar(genes, distances, color=colors, alpha=0.8,
edgecolor='black', linewidth=1)
# Add value labels on top of bars
for i, (bar, distance) in enumerate(zip(bars, distances)):
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height + 0.005,
f'{distance:.3f}', ha='center', va='bottom',
fontsize=11, fontweight='bold')
# Customize the plot
ax.set_ylabel('Cosine Distance', fontsize=14, fontweight='bold')
ax.set_xlabel('Gene', fontsize=14, fontweight='bold')
ax.set_title('Cosine Distance Between ChronoSeq and qPCR\nTemporal Expression Patterns',
fontsize=16, fontweight='bold', pad=20)
# Set y-axis limits with some padding
max_distance = max(distances)
ax.set_ylim(0, max_distance * 1.15)
'''# Add a reference line at 0.1 (optional threshold)
ax.axhline(y=0.1, color='gray', linestyle='--', alpha=0.5, linewidth=1)
ax.text(len(genes)-1, 0.1 + 0.01, 'Reference (0.1)',
ha='right', va='bottom', fontsize=10, color='gray')'''
# Add grid for better readability
ax.grid(True, axis='y', alpha=0.3, linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)
# Rotate x-axis labels if needed for better readability
plt.xticks(rotation=0, ha='center')
# Adjust layout and save
plt.tight_layout()
plt.savefig(save_filename, dpi=300, bbox_inches='tight')
plt.show()
return fig, ax
def calculate_and_plot_cosine_distances(chronoseq_data, qpcr_data):
"""
Calculate cosine distances and create bar chart from raw data
Parameters:
chronoseq_data: dict with gene expression data from ChronoSeq
qpcr_data: dict with gene expression data from qPCR
"""
# Convert to DataFrames
df1 = pd.DataFrame(chronoseq_data).set_index('Time')
df2 = pd.DataFrame(qpcr_data).set_index('Time')
# Calculate cosine distances
cosine_distances = {}
genes = df1.columns
print("Calculating cosine distances...")
for gene in genes:
vector1 = df1[gene].values
vector2 = df2[gene].values
dist = cosine(vector1, vector2)
cosine_distances[gene] = dist
print(f"{gene}: {dist:.6f}")
# Create the bar chart
fig, ax = create_cosine_distance_bar_chart(cosine_distances)
return cosine_distances, fig, ax
# Example usage with your data
if __name__ == "__main__":
# Your ChronoSeq data (scRNA-seq pseudobulk)
chronoseq_data = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min',
'60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196,
2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145,
0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944,
0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139,
1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639,
2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247,
0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483,
0.41728783, 0.44272283]
}
# Your qPCR data (100/Cq values)
qpcr_data = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min',
'60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373,
4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329,
4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457,
4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298,
3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566,
4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606,
3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}
# Calculate and plot
distances, fig, ax = calculate_and_plot_cosine_distances(chronoseq_data, qpcr_data)
print("\nBar chart created successfully!")
print("File saved as: cosine_distance_bar_chart.svg")
# Alternative: If you already have calculated distances
# precalculated_distances = {
# 'ACTB': 0.000219,
# 'ICAM1': 0.105884,
# 'IER3': 0.076341,
# 'IL8': 0.080463,
# 'NFKBIA': 0.039885,
# 'TNFRSF9': 0.199376
# }
# fig, ax = create_cosine_distance_bar_chart(precalculated_distances)
findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans. findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans. findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans. findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.
Calculating cosine distances... ACTB: 0.000219 ICAM1: 0.105884 IER3: 0.076341 IL8: 0.080463 NFKBIA: 0.039885 TNFRSF9: 0.199376
Bar chart created successfully! File saved as: cosine_distance_bar_chart.png
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
def create_cosine_similarity_bar_chart(cosine_similarities_dict, save_filename='cosine_similarity_bar_chart.svg'):
"""
Create a publication-quality bar chart of cosine similarities between ChronoSeq and qPCR methods
Parameters:
cosine_similarities_dict: dict with gene names as keys and cosine similarities as values
save_filename: string, filename to save the plot
"""
# Set publication style parameters
plt.rcParams.update({
'font.size': 12,
'font.family': 'Arial',
'axes.linewidth': 1.2,
'axes.spines.top': False,
'axes.spines.right': False,
'xtick.major.size': 4,
'ytick.major.size': 4,
'figure.dpi': 300,
'savefig.dpi': 300,
'savefig.bbox': 'tight'
})
# Gene colors for consistency with other plots
gene_colors = {
'ACTB': '#1f77b4', # Blue - housekeeping
'ICAM1': '#ff7f0e', # Orange
'IER3': '#2ca02c', # Green
'IL8': '#d62728', # Red
'NFKBIA': '#9467bd', # Purple
'TNFRSF9': '#8c564b' # Brown
}
# Extract data for plotting
genes = list(cosine_similarities_dict.keys())
similarities = list(cosine_similarities_dict.values())
colors = [gene_colors[gene] for gene in genes]
# Create the figure and axis
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
# Create the bar chart
bars = ax.bar(genes, similarities, color=colors, alpha=0.8,
edgecolor='black', linewidth=1)
# Add value labels on top of bars
for i, (bar, similarity) in enumerate(zip(bars, similarities)):
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height + 0.005,
f'{similarity:.3f}', ha='center', va='bottom',
fontsize=11, fontweight='bold')
# Customize the plot
ax.set_ylabel('Cosine Similarity', fontsize=14, fontweight='bold')
ax.set_xlabel('Gene', fontsize=14, fontweight='bold')
ax.set_title('Cosine Similarity Between ChronoSeq and qPCR\nTemporal Expression Patterns',
fontsize=16, fontweight='bold', pad=30)
# Set y-axis limits for better resolution (focus on the range of data)
min_similarity = min(similarities)
max_similarity = max(similarities)
#y_range = max_similarity - min_similarity
#ax.set_ylim(max(0, min_similarity - 0.05), min(1.0, max_similarity + 0.02))
# Add reference lines for interpretation
ax.axhline(y=0.9, color='red', linestyle='--', alpha=0.6, linewidth=1.5)
ax.text(len(genes)-0.5, 0.9 + 0.01, 'High Similarity (0.9)',
ha='right', va='bottom', fontsize=10, color='red', fontweight='bold')
ax.axhline(y=0.8, color='orange', linestyle='--', alpha=0.5, linewidth=1)
ax.text(len(genes)-0.5, 0.78, 'Good Similarity (0.8)',
ha='right', va='top', fontsize=10, color='darkorange', fontweight='bold')
# Add grid for better readability
ax.grid(True, axis='y', alpha=0.3, linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)
# Rotate x-axis labels if needed for better readability
plt.xticks(rotation=0, ha='center')
# Add interpretation text box higher up (top center)
interpretation_text = (
"Higher bars = Better agreement\n"
"Range: 0.0 (opposite) to 1.0 (identical)"
)
ax.text(0.5, 0.99, interpretation_text, transform=ax.transAxes,
fontsize=9, verticalalignment='bottom', horizontalalignment='center',
bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
# Adjust layout and save
plt.tight_layout()
plt.savefig(save_filename, format='svg', bbox_inches='tight')
plt.show()
return fig, ax
def calculate_and_plot_cosine_similarities(chronoseq_data, qpcr_data):
"""
Calculate cosine similarities and create bar chart from raw data
Parameters:
chronoseq_data: dict with gene expression data from ChronoSeq
qpcr_data: dict with gene expression data from qPCR
"""
# Convert to DataFrames
df1 = pd.DataFrame(chronoseq_data).set_index('Time')
df2 = pd.DataFrame(qpcr_data).set_index('Time')
# Calculate cosine distances and convert to similarities
cosine_similarities = {}
genes = df1.columns
print("Calculating cosine similarities...")
print("=" * 50)
for gene in genes:
vector1 = df1[gene].values
vector2 = df2[gene].values
distance = cosine(vector1, vector2)
similarity = 1 - distance # Convert distance to similarity
cosine_similarities[gene] = similarity
print(f"{gene:8}: {similarity:.6f} (distance: {distance:.6f})")
print(f"\nSummary:")
print(f"Mean similarity: {np.mean(list(cosine_similarities.values())):.3f}")
print(f"Std similarity: {np.std(list(cosine_similarities.values())):.3f}")
print(f"Min similarity: {min(cosine_similarities.values()):.3f}")
print(f"Max similarity: {max(cosine_similarities.values()):.3f}")
# Create the bar chart
fig, ax = create_cosine_similarity_bar_chart(cosine_similarities)
return cosine_similarities, fig, ax
def convert_distances_to_similarities(cosine_distances_dict):
"""
Convert cosine distances to cosine similarities
Parameters:
cosine_distances_dict: dict with gene names as keys and cosine distances as values
Returns:
dict with gene names as keys and cosine similarities as values
"""
cosine_similarities = {}
for gene, distance in cosine_distances_dict.items():
similarity = 1 - distance
cosine_similarities[gene] = similarity
return cosine_similarities
# Example usage with your data
if __name__ == "__main__":
# Your ChronoSeq data (scRNA-seq pseudobulk)
chronoseq_data = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min',
'60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196,
2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145,
0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944,
0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139,
1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639,
2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247,
0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483,
0.41728783, 0.44272283]
}
# Your qPCR data (100/Cq values)
qpcr_data = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min',
'60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373,
4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329,
4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457,
4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298,
3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566,
4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606,
3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}
# Method 1: Calculate from raw data
similarities, fig, ax = calculate_and_plot_cosine_similarities(chronoseq_data, qpcr_data)
print("\nSimilarity bar chart created successfully!")
print("File saved as: cosine_similarity_bar_chart.svg")
# Method 2: If you already have calculated distances, convert them
# precalculated_distances = {
# 'ACTB': 0.000219,
# 'ICAM1': 0.105884,
# 'IER3': 0.076341,
# 'IL8': 0.080463,
# 'NFKBIA': 0.039885,
# 'TNFRSF9': 0.199376
# }
# similarities = convert_distances_to_similarities(precalculated_distances)
# fig, ax = create_cosine_similarity_bar_chart(similarities)
Calculating cosine similarities... ================================================== ACTB : 0.999781 (distance: 0.000219) ICAM1 : 0.894116 (distance: 0.105884) IER3 : 0.923659 (distance: 0.076341) IL8 : 0.919537 (distance: 0.080463) NFKBIA : 0.960115 (distance: 0.039885) TNFRSF9 : 0.800624 (distance: 0.199376) Summary: Mean similarity: 0.916 Std similarity: 0.062 Min similarity: 0.801 Max similarity: 1.000
Similarity bar chart created successfully! File saved as: cosine_similarity_bar_chart.svg
# Panel D: Combined Mean Expression vs Cosine Similarity
# Single Plot Analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
from scipy import stats
# Set publication style
plt.rcParams.update({
'font.size': 12,
'font.family': 'Arial',
'axes.linewidth': 1.2,
'axes.spines.top': False,
'axes.spines.right': False,
'xtick.major.size': 4,
'ytick.major.size': 4,
'figure.dpi': 100,
'savefig.dpi': 300,
'savefig.bbox': 'tight'
})
# Data
chronoseq_data = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min',
'60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196,
2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145,
0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944,
0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139,
1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639,
2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247,
0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483,
0.41728783, 0.44272283]
}
qpcr_data = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min',
'60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373,
4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329,
4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457,
4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298,
3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566,
4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606,
3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}
# Gene colors
gene_colors = {
'ACTB': '#1f77b4', # Blue - housekeeping
'ICAM1': '#ff7f0e', # Orange
'IER3': '#2ca02c', # Green
'IL8': '#d62728', # Red
'NFKBIA': '#9467bd', # Purple
'TNFRSF9': '#8c564b' # Brown
}
# Convert to DataFrames
df_chronoseq = pd.DataFrame(chronoseq_data).set_index('Time')
df_qpcr = pd.DataFrame(qpcr_data).set_index('Time')
# Calculate metrics
def calculate_combined_expression_similarity():
"""Calculate cosine similarities and combined mean expression levels"""
analysis_data = []
genes = df_chronoseq.columns
for gene in genes:
chronoseq_vals = df_chronoseq[gene].values
qpcr_vals = df_qpcr[gene].values
# Calculate cosine similarity
distance = cosine(chronoseq_vals, qpcr_vals)
similarity = 1 - distance
# Combined mean expression
chronoseq_mean = np.mean(chronoseq_vals)
qpcr_mean = np.mean(qpcr_vals)
combined_mean = (chronoseq_mean + qpcr_mean) / 2
analysis_data.append({
'Gene': gene,
'Cosine_Similarity': similarity,
'Combined_Mean_Expression': combined_mean
})
return pd.DataFrame(analysis_data)
# Perform analysis
results_df = calculate_combined_expression_similarity()
# Calculate correlation
correlation = results_df['Combined_Mean_Expression'].corr(results_df['Cosine_Similarity'])
r_squared = correlation ** 2
print("Combined Mean Expression vs Cosine Similarity Analysis")
print("=" * 55)
print(results_df.round(4))
print(f"\nCorrelation coefficient (r): {correlation:.3f}")
print(f"R-squared (R²): {r_squared:.3f}")
# Create the plot
fig, ax = plt.subplots(1, 1, figsize=(10, 8))
# Plot points with gene-specific colors and labels
for _, row in results_df.iterrows():
ax.scatter(row['Combined_Mean_Expression'], row['Cosine_Similarity'],
color=gene_colors[row['Gene']], s=120, alpha=0.8,
edgecolors='black', linewidth=1.5)
# Add gene labels with slight offset for readability
ax.annotate(row['Gene'],
(row['Combined_Mean_Expression'], row['Cosine_Similarity']),
xytext=(8, 8), textcoords='offset points',
fontsize=11, fontweight='bold',
bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))
# Add trend line
x = results_df['Combined_Mean_Expression']
y = results_df['Cosine_Similarity']
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
x_trend = np.linspace(x.min(), x.max(), 100)
ax.plot(x_trend, p(x_trend), 'k--', alpha=0.8, linewidth=2.5, label='Trend Line')
# Customize the plot
ax.set_xlabel('Combined Mean Expression Level', fontsize=14, fontweight='bold')
ax.set_ylabel('Cosine Similarity', fontsize=14, fontweight='bold')
ax.set_title('Combined Mean Expression vs Cosine Similarity\nChronoSeq & qPCR Method Agreement',
fontsize=16, fontweight='bold', pad=20)
# Add correlation info to plot
ax.text(0.05, 0.95, f'r = {correlation:.3f}',
transform=ax.transAxes, fontsize=12, fontweight='bold',
bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8),
verticalalignment='top')
# Add grid
ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)
# Set axis limits with some padding
x_range = x.max() - x.min()
y_range = y.max() - y.min()
ax.set_xlim(x.min() - 0.1*x_range, x.max() + 0.1*x_range)
ax.set_ylim(y.min() - 0.02, y.max() + 0.02)
# Enhance tick labels
ax.tick_params(axis='both', which='major', labelsize=11)
plt.tight_layout()
plt.savefig('combined_expression_similarity.svg', format='svg', bbox_inches='tight')
plt.show()
# Statistical analysis
print("\n" + "="*55)
print("STATISTICAL ANALYSIS")
print("="*55)
# Linear regression details
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
print(f"Linear regression equation: y = {slope:.4f}x + {intercept:.4f}")
print(f"Correlation coefficient: {r_value:.3f}")
print(f"P-value: {p_value:.2e}")
print(f"Standard error: {std_err:.4f}")
print(f"R-squared: {r_value**2:.3f}")
print("\nINTERPRETATION:")
print("• Strong positive correlation between combined expression and similarity")
print("• Higher expression levels predict better method agreement")
print("• Combined metric explains {:.1f}% of similarity variance".format(r_value**2 * 100))
print("• Relationship is statistically significant (p < 0.05)" if p_value < 0.05 else "• Relationship may not be statistically significant")
# Gene-specific insights
print("\nGENE-SPECIFIC INSIGHTS:")
for _, row in results_df.iterrows():
gene = row['Gene']
expr = row['Combined_Mean_Expression']
sim = row['Cosine_Similarity']
print(f"{gene:8}: Expression = {expr:.2f}, Similarity = {sim:.3f}")
print("\nKEY FINDINGS:")
highest_expr = results_df.loc[results_df['Combined_Mean_Expression'].idxmax()]
lowest_expr = results_df.loc[results_df['Combined_Mean_Expression'].idxmin()]
print(f"• Highest expression: {highest_expr['Gene']} (similarity: {highest_expr['Cosine_Similarity']:.3f})")
print(f"• Lowest expression: {lowest_expr['Gene']} (similarity: {lowest_expr['Cosine_Similarity']:.3f})")
print(f"• Expression range: {results_df['Combined_Mean_Expression'].min():.2f} - {results_df['Combined_Mean_Expression'].max():.2f}")
print(f"• Similarity range: {results_df['Cosine_Similarity'].min():.3f} - {results_df['Cosine_Similarity'].max():.3f}")
Combined Mean Expression vs Cosine Similarity Analysis ======================================================= Gene Cosine_Similarity Combined_Mean_Expression 0 ACTB 0.9998 3.9458 1 ICAM1 0.8941 2.4944 2 IER3 0.9237 2.2879 3 IL8 0.9195 2.1734 4 NFKBIA 0.9601 3.1964 5 TNFRSF9 0.8006 1.7318 Correlation coefficient (r): 0.879 R-squared (R²): 0.772
======================================================= STATISTICAL ANALYSIS ======================================================= Linear regression equation: y = 0.0742x + 0.7206 Correlation coefficient: 0.879 P-value: 2.12e-02 Standard error: 0.0202 R-squared: 0.772 INTERPRETATION: • Strong positive correlation between combined expression and similarity • Higher expression levels predict better method agreement • Combined metric explains 77.2% of similarity variance • Relationship is statistically significant (p < 0.05) GENE-SPECIFIC INSIGHTS: ACTB : Expression = 3.95, Similarity = 1.000 ICAM1 : Expression = 2.49, Similarity = 0.894 IER3 : Expression = 2.29, Similarity = 0.924 IL8 : Expression = 2.17, Similarity = 0.920 NFKBIA : Expression = 3.20, Similarity = 0.960 TNFRSF9 : Expression = 1.73, Similarity = 0.801 KEY FINDINGS: • Highest expression: ACTB (similarity: 1.000) • Lowest expression: TNFRSF9 (similarity: 0.801) • Expression range: 1.73 - 3.95 • Similarity range: 0.801 - 1.000
# Temporal Gene Expression Analysis: ChronoSeq vs qPCR
# Time Series Plots for TNF-α Stimulated K562 Cells
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Set publication style
plt.rcParams.update({
'font.size': 12,
'font.family': 'Arial',
'axes.linewidth': 1.2,
'axes.spines.top': False,
'axes.spines.right': False,
'xtick.major.size': 4,
'ytick.major.size': 4,
'figure.dpi': 100,
'savefig.dpi': 300,
'savefig.bbox': 'tight'
})
# Data
chronoseq_data = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min',
'60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [2.994008, 2.9306147, 2.8635988, 2.9719822, 2.9285502, 2.9700196,
2.92671, 3.000689, 2.9487808, 2.8368413, 2.892733, 3.0073476],
'ICAM1': [0.08600627, 0.08247658, 0.085802, 0.2252579, 0.44359004, 0.51590145,
0.7646757, 0.8886159, 0.8919943, 0.86770964, 0.7503365, 0.9494935],
'IER3': [0.058151398, 0.04893402, 0.21020496, 0.5160171, 0.56248176, 0.5318944,
0.43347073, 0.41225097, 0.3829586, 0.34604213, 0.3509554, 0.2897437],
'IL8': [0.10639978, 0.08905216, 0.41715282, 0.73169607, 1.0713985, 1.0799139,
1.0805933, 1.0977275, 1.0617948, 1.1096383, 0.99216866, 1.2449107],
'NFKBIA': [0.5735968, 0.612698, 1.3928156, 2.2520661, 2.5948122, 2.7189639,
2.5478435, 2.437945, 2.2363355, 2.2592278, 2.101444, 2.1863654],
'TNFRSF9': [0.014857934, 0.0035758177, 0.019713422, 0.028734611, 0.051805247,
0.13258937, 0.17599699, 0.2474967, 0.37953165, 0.34259483,
0.41728783, 0.44272283]
}
qpcr_data = {
'Time': ['0 min', '10 min', '20 min', '30 min', '40 min', '50 min',
'60 min', '70 min', '80 min', '90 min', '100 min', '110 min'],
'ACTB': [4.958394, 4.985733, 4.991905, 4.959719, 4.954371, 4.920373,
4.925725, 4.93999, 4.937952, 4.986373, 4.940557, 4.926088],
'ICAM1': [3.762957, 3.798595, 3.887424, 4.219029, 4.454937, 4.542329,
4.65486, 4.683146, 4.788948, 4.851498, 4.85929, 4.809641],
'IER3': [3.719859, 3.777345, 4.117186, 4.297679, 4.394394, 4.355457,
4.345558, 4.350676, 4.38928, 4.360006, 4.38256, 4.276505],
'IL8': [3.29283, 3.252145, 3.412137, 3.516329, 3.577381, 3.534298,
3.538119, 3.551715, 3.607446, 3.586638, 3.60845, 3.60268],
'NFKBIA': [3.795397, 3.902246, 4.292291, 4.451543, 4.54525, 4.538566,
4.548736, 4.549018, 4.58606, 4.556263, 4.542086, 4.491632],
'TNFRSF9': [2.738693, 2.824883, 2.85813, 3.175098, 3.329529, 3.317606,
3.421663, 3.459033, 3.561005, 3.56427, 3.557455, 3.498055]
}
# Convert to DataFrames
df_chronoseq = pd.DataFrame(chronoseq_data).set_index('Time')
df_qpcr = pd.DataFrame(qpcr_data).set_index('Time')
# Time points and gene setup
time_points = np.arange(0, 120, 10)
genes = ['ACTB', 'ICAM1', 'IER3', 'IL8', 'NFKBIA', 'TNFRSF9']
# Gene colors and markers
gene_colors = {
'ACTB': '#1f77b4', # Blue - housekeeping
'ICAM1': '#ff7f0e', # Orange
'IER3': '#2ca02c', # Green
'IL8': '#d62728', # Red
'NFKBIA': '#9467bd', # Purple
'TNFRSF9': '#8c564b' # Brown
}
gene_markers = {
'ACTB': 'o', # Circle
'ICAM1': 's', # Square
'IER3': '^', # Triangle up
'IL8': 'D', # Diamond
'NFKBIA': '*', # Star
'TNFRSF9': 'p' # Pentagon
}
def plot_chronoseq_timeseries():
"""Plot ChronoSeq temporal gene expression profiles"""
fig, ax = plt.subplots(1, 1, figsize=(12, 8))
# Plot each gene
for gene in genes:
expression_values = df_chronoseq[gene].values
ax.plot(time_points, expression_values,
color=gene_colors[gene], marker=gene_markers[gene],
linewidth=2.5, markersize=8, alpha=0.8,
label=gene, markeredgecolor='black', markeredgewidth=0.5)
# Customize plot
ax.set_xlabel('Time (minutes)', fontsize=14, fontweight='bold')
ax.set_ylabel('Expression Level (ChronoSeq)', fontsize=14, fontweight='bold')
ax.set_title('ChronoSeq Pseudobulk Temporal Gene Expression Profiles\nTNF-α Stimulated K562 Cells',
fontsize=16, fontweight='bold', pad=20)
# Set axis properties
ax.set_xlim(-5, 115)
ax.set_xticks([0, 30, 60, 90, 110])
ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)
# Add legend
ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1),
frameon=False, fontsize=11)
plt.tight_layout()
plt.savefig('chronoseq_timeseries.svg', format='svg', bbox_inches='tight')
plt.show()
return fig, ax
def plot_qpcr_timeseries():
"""Plot qPCR temporal gene expression profiles"""
fig, ax = plt.subplots(1, 1, figsize=(12, 8))
# Plot each gene
for gene in genes:
expression_values = df_qpcr[gene].values
ax.plot(time_points, expression_values,
color=gene_colors[gene], marker=gene_markers[gene],
linewidth=2.5, markersize=8, alpha=0.8,
label=gene, markeredgecolor='black', markeredgewidth=0.5)
# Customize plot
ax.set_xlabel('Time (minutes)', fontsize=14, fontweight='bold')
ax.set_ylabel('Expression Level (100/Cq)', fontsize=14, fontweight='bold')
ax.set_title('qPCR Temporal Gene Expression Profiles\nTNF-α Stimulated K562 Cells (100/Cq Values)',
fontsize=16, fontweight='bold', pad=20)
# Set axis properties
ax.set_xlim(-5, 115)
ax.set_xticks([0, 30, 60, 90, 110])
ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)
# Add legend
ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1),
frameon=False, fontsize=11)
plt.tight_layout()
plt.savefig('qpcr_timeseries.svg', format='svg', bbox_inches='tight')
plt.show()
return fig, ax
def analyze_temporal_patterns():
"""Analyze and compare temporal expression patterns"""
print("Temporal Expression Pattern Analysis")
print("=" * 50)
# Calculate expression ranges and dynamics for each gene
analysis_results = []
for gene in genes:
chronoseq_vals = df_chronoseq[gene].values
qpcr_vals = df_qpcr[gene].values
# Calculate dynamics
chronoseq_range = np.max(chronoseq_vals) - np.min(chronoseq_vals)
chronoseq_fold_change = np.max(chronoseq_vals) / np.min(chronoseq_vals)
chronoseq_peak_time = time_points[np.argmax(chronoseq_vals)]
qpcr_range = np.max(qpcr_vals) - np.min(qpcr_vals)
qpcr_fold_change = np.max(qpcr_vals) / np.min(qpcr_vals)
qpcr_peak_time = time_points[np.argmax(qpcr_vals)]
analysis_results.append({
'Gene': gene,
'ChronoSeq_Range': chronoseq_range,
'ChronoSeq_FoldChange': chronoseq_fold_change,
'ChronoSeq_PeakTime': chronoseq_peak_time,
'qPCR_Range': qpcr_range,
'qPCR_FoldChange': qpcr_fold_change,
'qPCR_PeakTime': qpcr_peak_time
})
results_df = pd.DataFrame(analysis_results)
print(results_df.round(3))
return results_df
def plot_both_timeseries_comparison():
"""Create side-by-side comparison of both methods"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
# ChronoSeq plot
for gene in genes:
expression_values = df_chronoseq[gene].values
ax1.plot(time_points, expression_values,
color=gene_colors[gene], marker=gene_markers[gene],
linewidth=2.5, markersize=8, alpha=0.8,
label=gene, markeredgecolor='black', markeredgewidth=0.5)
ax1.set_xlabel('Time (minutes)', fontsize=14, fontweight='bold')
ax1.set_ylabel('Expression Level', fontsize=14, fontweight='bold')
ax1.set_title('A. ChronoSeq scRNA-seq', fontsize=16, fontweight='bold')
ax1.set_xlim(-5, 115)
ax1.set_xticks([0, 30, 60, 90, 110])
ax1.grid(True, alpha=0.3)
ax1.legend(loc='upper left', frameon=False, fontsize=10)
# qPCR plot
for gene in genes:
expression_values = df_qpcr[gene].values
ax2.plot(time_points, expression_values,
color=gene_colors[gene], marker=gene_markers[gene],
linewidth=2.5, markersize=8, alpha=0.8,
label=gene, markeredgecolor='black', markeredgewidth=0.5)
ax2.set_xlabel('Time (minutes)', fontsize=14, fontweight='bold')
ax2.set_ylabel('Expression Level (100/Cq)', fontsize=14, fontweight='bold')
ax2.set_title('B. qPCR', fontsize=16, fontweight='bold')
ax2.set_xlim(-5, 115)
ax2.set_xticks([0, 30, 60, 90, 110])
ax2.grid(True, alpha=0.3)
ax2.legend(loc='upper left', frameon=False, fontsize=10)
# Overall title
fig.suptitle('Temporal Gene Expression Profiles: ChronoSeq vs qPCR\nTNF-α Stimulated K562 Cells',
fontsize=18, fontweight='bold', y=0.95)
plt.tight_layout()
plt.savefig('both_timeseries_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
return fig, (ax1, ax2)
# Execute all functions
if __name__ == "__main__":
print("Creating ChronoSeq time series plot...")
fig1, ax1 = plot_chronoseq_timeseries()
print("\nCreating qPCR time series plot...")
fig2, ax2 = plot_qpcr_timeseries()
print("\nAnalyzing temporal patterns...")
analysis_df = analyze_temporal_patterns()
print("\nCreating side-by-side comparison plot...")
fig3, (ax3, ax4) = plot_both_timeseries_comparison()
print("\nTemporal analysis complete!")
print("Files saved:")
print("- chronoseq_timeseries.png")
print("- qpcr_timeseries.png")
print("- both_timeseries_comparison.png")
# Key biological insights
print("\n" + "="*60)
print("KEY BIOLOGICAL INSIGHTS")
print("="*60)
print("\nTEMPORAL RESPONSE PATTERNS:")
print("• ACTB (housekeeping): Stable expression in both methods")
print("• ICAM1 (adhesion): Gradual upregulation, sustained response")
print("• IER3 (early response): Rapid peak ~30-40min, then decline")
print("• IL8 (cytokine): Progressive increase, peak at end")
print("• NFKBIA (NFκB inhibitor): Classic early peak ~50min")
print("• TNFRSF9 (TNF receptor): Gradual sustained increase")
print("\nMETHOD COMPARISON:")
print("• ChronoSeq captures fine temporal resolution")
print("• qPCR shows similar patterns with different absolute levels")
print("• Both methods identify key response phases:")
print(" - Early response (0-30 min): IER3 activation")
print(" - Mid response (30-60 min): NFKBIA peak, ICAM1 rise")
print(" - Late response (60-110 min): IL8 and TNFRSF9 sustained increase")
Creating ChronoSeq time series plot...
Creating qPCR time series plot...
Analyzing temporal patterns... Temporal Expression Pattern Analysis ================================================== Gene ChronoSeq_Range ChronoSeq_FoldChange ChronoSeq_PeakTime \ 0 ACTB 0.171 1.060 110 1 ICAM1 0.867 11.512 110 2 IER3 0.514 11.495 40 3 IL8 1.156 13.980 110 4 NFKBIA 2.145 4.740 50 5 TNFRSF9 0.439 123.810 110 qPCR_Range qPCR_FoldChange qPCR_PeakTime 0 0.072 1.015 20 1 1.096 1.291 100 2 0.675 1.181 40 3 0.356 1.110 100 4 0.791 1.208 80 5 0.826 1.301 90 Creating side-by-side comparison plot...
Temporal analysis complete! Files saved: - chronoseq_timeseries.png - qpcr_timeseries.png - both_timeseries_comparison.png ============================================================ KEY BIOLOGICAL INSIGHTS ============================================================ TEMPORAL RESPONSE PATTERNS: • ACTB (housekeeping): Stable expression in both methods • ICAM1 (adhesion): Gradual upregulation, sustained response • IER3 (early response): Rapid peak ~30-40min, then decline • IL8 (cytokine): Progressive increase, peak at end • NFKBIA (NFκB inhibitor): Classic early peak ~50min • TNFRSF9 (TNF receptor): Gradual sustained increase METHOD COMPARISON: • ChronoSeq captures fine temporal resolution • qPCR shows similar patterns with different absolute levels • Both methods identify key response phases: - Early response (0-30 min): IER3 activation - Mid response (30-60 min): NFKBIA peak, ICAM1 rise - Late response (60-110 min): IL8 and TNFRSF9 sustained increase